Installation

CMAP hosts data on a Microsoft SQL Server. An R user will need the following package to connect to the database: a) “DBI”, for database interface; b) “odbc” for connecting to the database using DBI interface.

But before that, for the Mac operating system, a user needs to install the unixODBC library and database drivers. We suggest using SQL Server ODBC drivers (Free TDS). Using Homebrew, run the following commands to install the suggested module.

In case of Linux operating system, first, install Anaconda distribition, and then run the following commands to install suggested module.

Please follow the link to see other drivers available for installation.

In addition, a user may require some additional R package for downloading, processing and visualizing the data. Run the following commands to install some of the essential packages.

## Package "DBI" provide interface to the database
install.packages("DBI")

## Driver for the database
install.packages("odbc")

## Package for data processing:
install.packages("dbplyr")  
install.packages("plyr")

## Package for visualization:
install.packages("ggplot2")
install.packages("plotly")

Connecting to the database

User can connect to the database using the login credentials (Direct connect) suggested in the examples.

Direct connection

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)

con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number
dbDisconnect(con)

Peeking into the table

Attributes of a table includes:

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number

## Choose table:
table.name <-  "tblsst_AVHRR_OI_NRT"


## Variable name in the table
# tbl.fields <- dbListFields(con,table.name)
# print(tbl.fields)


## collect sample data 
tbl.fields <- getDataSample(con,table.name,n=5)
print(tbl.fields) 
##       lat      lon       time sst ID
## 1 -89.875 -179.875 2017-12-03  NA  1
## 2 -89.875 -179.625 2017-12-03  NA  2
## 3 -89.875 -179.375 2017-12-03  NA  3
## 4 -89.875 -179.125 2017-12-03  NA  4
## 5 -89.875 -178.875 2017-12-03  NA  5
## Class of each column in the table
tbl.colClass <- getColClass(con,table.name)
print(tbl.colClass)
##   Variable      Type
## 1      lat   numeric
## 2      lon   numeric
## 3     time      Date
## 4      sst   numeric
## 5       ID integer64
## Number of observations
nObs <- getObservationCount(con,table.name)
nObs
## [1] 1760215552
## Space/time information of the table
# tbl.spaceTimeInfo <- getSpaceTimeRange(con,table.name)
# print(tbl.spaceTimeInfo)



## Numeric variable range:
# tbl.rangeNumVar <- getRangeNumVar(con,table.name)
# print(tbl.rangeNumVar)
  

dbDisconnect(con)

Attributes - Subset of a table

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number


## Input: Table name; variable name, space time range information
table.name = 'tblsst_AVHRR_OI_NRT'                    # table name

sel.var = 'sst'                                       # choose variable 
range.var <- list()                                   # Range variable information 
range.var$time <- c('2016-04-30', '2016-04-30')
range.var$lat <- c(10,70)
range.var$lon <- c(-180,-80)



## Space/time/number of observations/summary
tbl.subsetSpaceTimeSummary <- getSubsetSpaceTimeRange(con,table.name,range.var)
print(tbl.subsetSpaceTimeSummary)
## # A tibble: 1 x 6
##   time_min   lat_min lon_min time_max   lat_max lon_max
##   <date>       <dbl>   <dbl> <date>       <dbl>   <dbl>
## 1 2016-04-30    10.1   -180. 2016-04-30    69.9   -80.1
# Summary of the data:
tbl.subsetSummary <- getSubsetRangeNumVar(con, table.name, range.var)
print(tbl.subsetSummary)
##   Variable         min       max        sd
## 1      lat   10.125000  69.87500 17.320448
## 2      lon -179.875000 -80.12500 28.867574
## 3      sst   -1.800006  31.41999  9.571941
dbDisconnect(con)

Data retrieval and preprocessing

To retrieve the data from CMAP, a user need to specify following parameters:

Use getTableData function to download the data as data frame.

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number


## Input: Table name; variable name, space time range information
table.name = 'tblsst_AVHRR_OI_NRT'      # table name
sel.var = 'sst'                         # choose variable 
range.var <- list()                     # Range variable [lat,lon,time]
range.var$lat <- c(10,70)
range.var$lon <- c(-180,-80)
range.var$time <- c('2016-04-30', '2016-04-30')


## Subset selection:
tbl.subset <- getTableData(con, table.name, sel.var, range.var)
head(tbl.subset)
## # A tibble: 6 x 4
##     lat   lon time         sst
##   <dbl> <dbl> <date>     <dbl>
## 1  10.1 -180. 2016-04-30  27.9
## 2  10.1 -180. 2016-04-30  27.9
## 3  10.1 -179. 2016-04-30  27.8
## 4  10.1 -179. 2016-04-30  27.8
## 5  10.1 -179. 2016-04-30  27.8
## 6  10.1 -179. 2016-04-30  27.8
## Ordering of the numeric variable:
# orderby <- c('time','lat','lon')        # Specify orderby variable
# tbl.subset <- getTableData(con, table.name, sel.var, range.var,orderby)
# head(tbl.subset)


dbDisconnect(con)

Aggregate data

To avoid pulling large dataset from the CMAP server, especially when not required, user may be interested in pulling aggregated data. In addition to the essential input variables, a user need to specify the aggregate variable(agg.var).

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number

## Input: Table name; variable name, space time range information
table.name = 'tblsst_AVHRR_OI_NRT'      # table name
sel.var = 'sst'                         # choose variable 
range.var <- list()                     # Range variable [lat,lon,time]
range.var$lat <- c(25,30)
range.var$lon <- c(-160,-155)
range.var$time <- c('2016-03-29', '2016-05-29')   


## Aggregate
agg.var <- 'time'                       # Specify aggregate variable
tbl.subset <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset)
## # A tibble: 6 x 2
##   time         sst
##   <date>     <dbl>
## 1 2016-03-29  21.3
## 2 2016-03-30  20.6
## 3 2016-03-31  20.6
## 4 2016-04-01  20.7
## 5 2016-04-02  20.8
## 6 2016-04-03  20.9
dbDisconnect(con)

Colocalization

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number

## Setup
# source = './MGL1704.csv'
# table.name = "tblSST_AVHRR_OI_NRT"
# sel.var = "sst"
# latMargin = 0.3
# lonMargin = 0.3
# timeMargin = 1


## For the desired table, query a variable therein
# res = matchSource_onetable(con, source, table.name, sel.var,
#                            latMargin, lonMargin, timeMargin)
dbDisconnect(con)

Visualization

Depth profile

Call plot_depth function to obtain plot_ly/ggplot object.

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number
#
# Inpit variable:
table.list <- c('tblArgoMerge_REP', 'tblPisces_NRT', 'tblDarwin_Chl_Climatology') 
var.list <-  c('argo_merge_chl_adj', 'CHL', 'chl01_darwin_clim')  
#
selIndex <- 1                                    # selected argo_merge_chl_adj from tblArgoMerge_REP 
table.name <- table.list[selIndex]
sel.var <- var.list[selIndex]  
#
range.var <- list()
range.var$lat <- c(20,24)
range.var$lon <- c( -170, -150)
range.var$depth <- c(0, 1500)
range.var$time <- c('2016-04-30', '2016-04-30')


## Subset selection: data retrieval
agg.var <- 'depth' 
tbl.subset <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset)
## # A tibble: 6 x 2
##   depth argo_merge_chl_adj
##   <dbl>              <dbl>
## 1  4.10            NA     
## 2  4.20            NA     
## 3  5.90            NA     
## 4  6               NA     
## 5  7.20             0.0180
## 6  7.80            NA
## Plot -- Depth profiles:
p <- plot_depth(tbl.subset, 'plotly',sel.var)
p
dbDisconnect(con)

Regional map

Call plot_regMap function to obtain plot_ly/ggplot object.

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number

# Inpit variable:
table.name = 'tblsst_AVHRR_OI_NRT'   
sel.var = 'sst'                 
range.var <- list()
range.var$lat <- c(10,70)
range.var$lon <- c(-180,-80)
range.var$time <- c('2016-04-30', '2016-04-30')


## Data retrieval
tbl.subset <- getTableData(con, table.name, sel.var, range.var, order.var=  c('lat','lon'))
head(tbl.subset)
## # A tibble: 6 x 4
##     lat   lon time         sst
##   <dbl> <dbl> <date>     <dbl>
## 1  10.1 -80.1 2016-04-30  29.6
## 2  10.1 -80.4 2016-04-30  29.8
## 3  10.1 -80.6 2016-04-30  29.9
## 4  10.1 -80.9 2016-04-30  29.9
## 5  10.1 -81.1 2016-04-30  29.8
## 6  10.1 -81.4 2016-04-30  29.8
## Plot - regional map 
# out <- plot_regMap(con, table.name,sel.var,range.var, type = 'ggplot')
out <- plot_regMap(con, table.name,sel.var,range.var, type = 'plotly')
out$plot
dbDisconnect(con)

Time series plot

Call plot_ts function to obtain plot_ly/ggplot object.

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number

# Input variable:
table.list <- c('tblSST_AVHRR_OI_NRT', 'tblAltimetry_REP', 'tblPisces_NRT')  
var.list <- c('sst', 'sla', 'NO3')   

selIndex <- 1                              # selected "sst" from the table "tblSST_AVHRR_OI_NRT"
table.name <- table.list[selIndex]
sel.var <- var.list[selIndex]   


## Example I:
range.var <- list()
range.var$lat <- c(25,30)
range.var$lon <- c(-160,-155)
range.var$time <- c('2016-03-29', '2016-05-29')


## Subset selection: data retrieval
agg.var <- 'time' 
tbl.subset <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset,20)   
## # A tibble: 20 x 2
##    time         sst
##    <date>     <dbl>
##  1 2016-03-29  21.3
##  2 2016-03-30  20.6
##  3 2016-03-31  20.6
##  4 2016-04-01  20.7
##  5 2016-04-02  20.8
##  6 2016-04-03  20.9
##  7 2016-04-04  21.1
##  8 2016-04-05  20.9
##  9 2016-04-06  20.9
## 10 2016-04-07  20.7
## 11 2016-04-08  20.9
## 12 2016-04-09  21.0
## 13 2016-04-10  21.2
## 14 2016-04-11  21.1
## 15 2016-04-12  21.1
## 16 2016-04-13  21.1
## 17 2016-04-14  21.2
## 18 2016-04-15  21.4
## 19 2016-04-16  21.4
## 20 2016-04-17  21.4
## Plot -- Time series:
p <- plot_ts(tbl.subset,'plotly',sel.var)
p
dbDisconnect(con)

XY plot

plot_xy function download the data, and output a list which contain a) plot object: plot_ly/ggplot; b) corresponding data tables.

library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
                      Driver = "libtdsodbc.so",     ## Free TDS driver used
                      Server = "128.208.239.15",    ## IP address of the server
                      Database = "Opedia",          ## Database name
                      UID = "ArmLab",               ## User ID
                      PWD = "ArmLab2018",           ## Password
                      Port = 1433)                  ## Port number

# Inpit variable:
# 
table.list <- c('tblSST_AVHRR_OI_NRT', 'tblAltimetry_REP') 
var.list <-  c('sst', 'sla')  
#
range.var <- list()
range.var$lat <- c(25,30)
range.var$lon <- c(-160, -155)
range.var$time <- c('2016-03-29', '2016-05-29')
#
agg.var <- 'time' 

## -----------------------------------
## Dataset from table II
selIndex <- 1
table.name <- table.list[selIndex]                       # Specify table name I
sel.var <- var.list[selIndex]                            # Variable from table name I  

tbl.subset.x <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset.x)
## # A tibble: 6 x 2
##   time         sst
##   <date>     <dbl>
## 1 2016-03-29  21.3
## 2 2016-03-30  20.6
## 3 2016-03-31  20.6
## 4 2016-04-01  20.7
## 5 2016-04-02  20.8
## 6 2016-04-03  20.9
## Dataset from table II
selIndex <- 2
table.name <- table.list[selIndex]                       # Specify table name II
sel.var <- var.list[selIndex]                            # Variable from table name II


tbl.subset.y <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset.y)
## # A tibble: 6 x 2
##   time          sla
##   <date>      <dbl>
## 1 2016-03-29 0.0242
## 2 2016-03-30 0.0228
## 3 2016-03-31 0.0215
## 4 2016-04-01 0.0208
## 5 2016-04-02 0.0197
## 6 2016-04-03 0.0189
## Plot - XY
# out <- plot_xy(con, table.list,var.list,range.var,agg.var,type = 'ggplot')
out <- plot_xy(con, table.list,var.list,range.var,agg.var,type = 'plotly')
out$plot
dbDisconnect(con)